library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gapminder)
library(xlsx)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following object is masked from 'package:plotly':
##
## rename
population_xls <- read.xlsx("indicator gapminder population.xlsx", encoding = "UTF-8",stringsAsFactors= F, sheetIndex = 1, as.data.frame = TRUE, header=TRUE)
fertility_xls <- read.xlsx("indicator undata total_fertility.xlsx", encoding = "UTF-8",stringsAsFactors= F, sheetIndex = 1, as.data.frame = TRUE, header=TRUE)
lifeexp_xls <- read.xlsx("indicator life_expectancy_at_birth.xlsx", encoding = "UTF-8", stringsAsFactors= F, sheetIndex = 1, as.data.frame = TRUE, header=TRUE)
library(viridis)
## Loading required package: viridisLite
myvars <- paste("X", 1962:2015, sep="")
population <- population_xls[c('Total.population',myvars)]
fertility <- fertility_xls[c('Total.fertility.rate',myvars)]
lifeexp <- lifeexp_xls[c('Life.expectancy',myvars)]
colnames(population)[1] <- "Country"
colnames(fertility)[1] <- "Country"
colnames(lifeexp)[1] <- "Country"
lifeexp <- lifeexp[1:275,]
population <- population[1:275,]
population_m <- melt(population, id=c("Country"))
lifeexp_m <- melt(lifeexp, id=c("Country"))
fertility_m <- melt(fertility, id=c("Country"))
colnames(population_m)[3] <- "pop"
colnames(lifeexp_m)[3] <- "life"
colnames(fertility_m)[3] <- "fert"
mydf <- merge(lifeexp_m, fertility_m, by=c("Country","variable"), header =T)
mydf <- merge(mydf, population_m, by=c("Country","variable"), header =T)
continent <- gapminder %>% group_by(continent, country) %>% distinct(country, continent)
continent <- data.frame(lapply(continent, as.character), stringsAsFactors=FALSE)
colnames(continent)[1] <- "Country"
mydf_filter <- mydf %>% filter(Country %in% unique(continent$Country))
mydf_filter <- merge(mydf_filter, continent, by=c("Country"), header =T)
mydf_filter[is.na(mydf_filter)] <- 0
mydf_filter <- data.frame(lapply(mydf_filter, as.character), stringsAsFactors=FALSE)
mydf_filter$variable <- as.integer(as.character(gsub("X","",mydf_filter$variable)))
colnames(mydf_filter)[colnames(mydf_filter)=="variable"] <- "year"
mydf_filter$pop <- round(as.numeric(as.character(mydf_filter$pop))/1000000,1)
mydf_filter$fert <- as.numeric(as.character(mydf_filter$fert))
mydf_filter$life <- as.numeric(as.character(mydf_filter$life))
gapfertility <- mydf_filter
gapfertility <- highlight_key(gapfertility, ~Country)
gg_fert <- ggplot(data = gapfertility, aes(x = life, y = fert, color = continent)) +
geom_point(aes( alpha = 1/100000, shape = ".")) +
geom_smooth(method = "loess", size = 2) +
scale_color_viridis(discrete = TRUE) +
labs(x = "Life Expectancy", y = "Fertility", color = "Continent", shape = NA) +
scale_fill_discrete(name = "Continent", labels = c("Africa", "Americas", "Asia", "Europe", "Oceania")) +
scale_alpha(guide = FALSE) +
scale_shape(guide = FALSE)
gg_fertly <- ggplotly(gg_fert, tooltip = "Country") %>%
highlight(on = "plotly_hover")
## `geom_smooth()` using formula 'y ~ x'
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
gg_fertly
## Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_hover'). You can change this default via the `highlight()` function.
The data source is the gapminder package as well as some xlsx files from gapminder.com. I’m trying to show that as a countries’ citizens gain life expectancy, fertility drops off rapidly. This chart is supposed to be able to highlight a country’s datapoints and a continent’s loess regression.